%include "I:\workdata\706727\Build\Directories_And_SampleRestrictions.sas";
%include "I:\workdata\706727\Build\Programs.sas";
libname educform "\\srvfsenas1\data\Formater\SAS formater i Danmarks Statistik\SAS_datasaet\Uddannelser";

/* Basedata */
data base (drop=befupdSourceYear);
set raw.befupdv (keep= pnr befupdSourceYear koen kom IE_TYPE FOED_DAG where=(befupdSourceyear>1998 and befupdSourceYear<2020 and befupdSourceyear-year(FOED_DAG)<=60 and befupdSourceyear-year(FOED_DAG)>=12));
year=BEFupdSourceYear;
length year 8;
birthmonth=month(FOED_DAG);
birthyear=year(FOED_DAG);
run;

%macro monthdata; 
  %do t=1 %to 12;
	  data base&t;
	  set base;
	  month=&t;
	  run;
%end;
%mend monthdata;
%monthdata;

data basedata;
set %listsets(base,1,12);
run;

proc datasets;
delete base1 base2 base3 base4 base5 base6 base7 base8 base9 base10 base11 base12;
quit;

/* Income */
data ind;
set raw.indupd01v (keep=pnr indupd01sourceyear perindkialt_13 qaktivf_ny05 qpassivn kontanthj_13 loenmv_13 offpens);
rename indupd01sourceyear=year;
rename perindkialt_13=inc;
rename loenmv_13=earn;
rename qaktivf_ny05=assets;
rename qpassivn=liabilities;
run;

proc sort data=ind nodup; by pnr year; run;

data ind(drop=lagpnr lagyear);
set ind;
lagpnr=lag(pnr);
lagyear=lag(year);
if lagyear=year and lagpnr=pnr then delete;
run;

/* Education */
data educ(drop=uddaupd01sourceyear);
set raw.uddaupd01v (keep=pnr uddaupd01sourceyear hfaudd udd udd_n where=(uddaupd01sourceyear>1999));
year=uddaupd01sourceyear;
igudd=coalesce(udd,udd_n);
run;

data educformat1(drop=start);
set educform.c_audd2014_l1l5_k;
hfaudd=start*1;
rename audd2014_l1l5_k=educg;
run;

data educformat2(drop=start);
set educform.c_udd2014_l1l5_k;
igudd=start*1;
rename udd2014_l1l5_k=iggeducg;
run;

proc sql;
create table feduc(where=(hfaudd ne .)) as select * from educ as a

left join educformat1 as b on a.hfaudd=b.hfaudd
left join educformat2 as c on a.igudd=c.igudd;
quit;

/* Sick leave */
data sickdays;
set raw.sgdpupdv;
regyear=sgdpupdsourceyear;
if regyear<2007 then year=substr(FOERFRAV_C,1,4)*1;
if regyear>=2007 then year=year(FOERFRAV);
if regyear<2007 then month=substr(FOERFRAV_C,5,2)*1;
if regyear>=2007 then month=month(FOERFRAV);
if regyear<2007 then day=substr(FOERFRAV_C,7,2)*1;
if regyear>=2007 then day=day(FOERFRAV);
run;

proc sql;
create table sickdays2(where=(regyear=maxregyear)) as select *,max(regyear) as maxregyear
from sickdays 
group by pnr, year, month, day
order by pnr, year, month, day;
quit;

data sickdays; 
set sickdays2(where=((fravdage>=21 and year<2012) or (fravdage>=30 and year>=2012)));
fromdate=mdy(month,day,year);
todate=fromdate+fravdage;
fromyrm=year(fromdate)+(month(fromdate)-1)/12;
toyrm=year(todate)+(month(todate)-1)/12;
rename fravdage=sickdays;
startsick=1;
run;

%macro sickdata; 
 %do y=1999 %to 2019;
  %do t=1 %to 12;
  	%let yrm=%sysevalf((&y+(&t-1)/12));
	%let yrm=round(&yrm,0.01);
	  data sick&y&t(keep=pnr year month sick_sgdp sickdays_sgdp);
	  set sickdays (where=(fromyrm<=&yrm. and toyrm>=&yrm.));
	  year=&y;
	  month=&t;
	  sick_sgdp=1;
	  if round(fromyrm,0.01)=&yrm. then sickdays_sgdp=sickdays;
	  run;

	proc sql;
	create table temp as select pnr,year,month,max(sick_sgdp) as sick_sgdp,sum(sickdays_sgdp) as sickdays_sgdp from sick&y&t
	group by pnr, year,month
	order by pnr, year,month;
	quit;
	
	data sick&y&t;
	set temp;
	run;

	%end;
%end;
%mend sickdata;
%sickdata;

%macro listsick(var,start1,end1,start2,end2);
%do i=&start1. %to &end1.;
	%do j=&start2. %to &end2.;
		&var.&i.&j.
	%end;
%end;
%mend;

data fsickdata;
set %listsick(sick,1999,2019,1,12);
run;

proc datasets;
delete  lon %listsick(sick,1999,2019,1,12);
quit;

/* Dream data */
%macro listgradvars(sty,eny); 
 %do y=&sty. %to &eny.;
  %do t=1 %to 12;
  %if &t.<10 %then	GRAD_&y._0&t.;
  %else GRAD_&y._&t.;
	%end;
%end;
%mend listgradvars;

%macro newgradvars(sty,eny); 
 %do y=&sty. %to &eny.;
  %do t=1 %to 12;
  %if &t.<10 %then	GRAD&y.0&t.=GRAD_&y._0&t.*1;
  %else GRAD&y.&t.=GRAD_&y._&t.*1;;
  %end;
%end;
%mend newgradvars;

%macro listyvars(sty,eny); 
 %do y=&sty. %to &eny.;
 %let y2=%substr("&y.",4,2);
  %do t=1 %to 52;
  %if &t.<10 %then	y_&y2.0&t.;
  %else y_&y2.&t.;
  %end;
%end;
%mend listyvars;

%macro newyvars(sty,eny); 
 %do y=&sty. %to &eny.;
 %let y2=%substr("&y.",4,2);
  %do t=1 %to 52;
  %if &t.<10 %then	y&y2.0&t.=y_&y2.0&t.*1;
  %else y&y2.&t.=y_&y2.&t.*1;;
  %end;
%end;
%mend newyvars;

/* Employment */
data dreamgrad(drop=%listgradvars(2008,2019));
set raw.dreamv (keep= pnr  %listgradvars(2008,2019));
%newgradvars(2008,2019);
run;

proc transpose data=dreamgrad out=dreamgradw ;
by pnr;
var _numeric_;
run;

data dreamgradw(keep=pnr year month grad);
set dreamgradw;
grad=col1*160.33;
year=substr(_NAME_,5,4)*1;
month=substr(_NAME_,9,2)*1;
run;

/* Transfers */
data dreamyd(drop=%listyvars(1999,2019));
set raw.dreamv (keep= pnr %listyvars(1999,2019));
%newyvars(1999,2019);
run;

proc transpose data=dreamyd out=dreamw ;
by pnr;
var _numeric_;
run;

data dreamw;
set dreamw;
rename col1=yd;
if substr(_NAME_,2,2)="99" then year=1999;
if substr(_NAME_,2,2)~="99" then year=2000+substr(_NAME_,2,2)*1;
week=substr(_NAME_,4,2)*1;
run;

data dreamw;
set dreamw(where=(yd~=997 and yd~=998 and yd~=999));
date=intnx('week',mdy(1,1,year),week-1,'m');
format date date9.;
month=month(date);
fyd=floor(yd/10);
if fyd=65 then SUeduc=1;
if fyd=72 or fyd=14 then educhelp=1;
if fyd=11 then Dagp_unemp=1;
if fyd=13 then Cash_unemp=1;
if fyd=21 then Dagp_act=1;
if fyd=41 then Sabbat=1;
if fyd=70 or fyd=16 then intyd=1;
if fyd=71 then Cash_int=1;
if fyd=73 then Cash_unemp2=1;
if fyd=15 then cash_dagexp=1;
if fyd=74 then ledyd=1;
if fyd=75 then forre=1;
if fyd=76 then re=1;
if fyd=77 then flexjob=1;
if fyd=87 then jobafk=1;
if fyd=81 then resf=1;
if yd=781 then skaane=1;
if yd=783 then disab=1;
if fyd=89 then sick=1;
if fyd=88 then parleave=1;
if yd ne . then sometrans=1;
run;

proc sql;
create table dreamfinal as select pnr, year, month
,max(SUeduc) as SUeduc,max(educhelp) as educhelp
,max(Dagp_unemp) as Dagp_unemp,max(Cash_unemp) as Cash_unemp
,max(Dagp_act) as Dagp_act,max(intyd) as intyd,max(cash_dagexp) as cash_dagexp
,max(Cash_int) as Cash_int,max(Cash_unemp2) as Cash_unemp2
,max(ledyd) as ledyd,max(forre) as forre,max(re) as re
,max(jobafk) as jobafk,max(resf) as resf
,max(flexjob) as flexjob,max(skaane) as skaane,max(disab) as disab
,max(sick) as sick,max(parleave) as parleave,max(sometrans) as sometrans from dreamw

group by pnr, year, month;
quit;

/** LON register absenteeism **/
data lon;
set raw.lonv raw.lonnupdv;
fromyear=year(REF_FRADATO);
toyear=year(REF_TILDATO);
frommonth=month(REF_FRADATO);
tomonth=month(REF_TILDATO);
fromyrm=round(fromyear+(frommonth-1)/12,0.01);
toyrm=round(toyear+(tomonth-1)/12,0.01);
run;

%macro empdata; 
 %do y=1999 %to 2019;
  %do t=1 %to 12;
  	%let yrm=%sysevalf((&y+(&t-1)/12));
	%let yrm=round(&yrm,0.01);
	  data lon&y&t(keep=pnr year month emp absh workh);
	  set lon (where=(fromyrm<=&yrm. and toyrm>=&yrm.));
	  year=&y;
	  month=&t;
	  emp=1;
	  absh=timfra/((toyear-fromyear+1)*12+tomonth-frommonth+1);
	  workh=timprae/((toyear-fromyear+1)*12+tomonth-frommonth+1);
	  run;
	%end;
%end;
%mend empdata;
%empdata;

%macro listemp(var,start1,end1,start2,end2);
%do i=&start1. %to &end1.;
	%do j=&start2. %to &end2.;
		&var.&i.&j.
	%end;
%end;
%mend;

data empdata;
set %listemp(lon,1999,2019,1,12);
run;

proc sql;
create table  fempdata as select distinct pnr, year, max(emp) as emp, sum(absh) as absh, sum(workh) as workh from empdata
group by pnr, year
order by pnr, year;
quit;

proc datasets;
delete  lon %listemp(lon,1999,2019,1,12);
quit;

/*** Merge ***/
proc sql;
create table extradata as select * from basedata as a

left join ind as b on a.pnr=b.pnr and a.year=b.year
left join fempdata as c on a.pnr=c.pnr and a.year=c.year 
left join dreamfinal as d on a.pnr=d.pnr and a.year=d.year and a.month=d.month
left join dreamgradw as e on a.pnr=e.pnr and a.year=e.year and a.month=e.month
left join fsickdata as f on a.pnr=f.pnr and a.year=f.year and a.month=f.month
left join feduc(where=(pnr ne "" and educg ne "")) as g on a.pnr=g.pnr and a.year=g.year;
quit;

proc export data=extradata
outfile = "J:\Workdata\706727\Temp\extradata.dta"
replace;
run;

